# Updated Replication Code for:
# Does Runoff Disadvantage Women Candidates?
# September 2025

# Load Packages-----

library(tidyverse)
library(jtools)
library(broom)
library(ggeffects)
library(officer)
library(flextable)


# Load Data-----

data <- read_rds("Data/Final Replication Data.rds")

data_first_round <- data %>%
  filter(round == 1)

data_final_round <- data %>%
  filter(rule == "Plurality" |
           (runoff_held == 0 & round == 1) |
           (runoff_held == 1 & round == 2))


# Stats for Methods Section-----

##  457 elections
nrow(data_final_round)

##  79 countries
length(unique(data_final_round$state))

## Rule as of 2024
data_first_round %>%
  group_by(state) %>%
  filter(year == max(year)) %>%
  slice(1) %>%
  ungroup() %>%
  count(rule)

63/79

## Frequency of each rule
state_rule <- data_final_round %>%
  distinct(state, rule) %>%
  group_by(state) %>%
  summarize(rules = list(unique(rule)), .groups = "drop") %>%
  mutate(
    plurality = sapply(rules, function(x) "Plurality" %in% x),
    runoff = sapply(rules, function(x) "Runoff" %in% x),
    both = plurality & runoff) %>%
  ungroup()

sum(state_rule$plurality[state_rule$both == F]) ## 15 plurality
sum(state_rule$runoff[state_rule$both == F]) ## 56 runoff
sum(state_rule$both) ## 8 both

# No Survey Data for 7 countries, 28 elections
data_final_round %>% filter(is.na(strong_sexism_imp)) %>% nrow()
data_final_round %>% filter(is.na(strong_sexism_imp)) %>% pull(unique(state))
data_final_round %>% filter(is.na(strong_sexism_imp)) %>% summarize(unique_states = n_distinct(state))


# Stats for Results Section-----

## Women won 6.8% of elections
summary(data_final_round$w_winner) / nrow(data_final_round) * 100

## 77% of elections had no women candidates
sum(data_first_round$w_candidates == 0) / nrow(data_first_round) * 100

## Women's representation is slightly better under plurality
data_final_round %>%
  group_by(rule) %>%
  summarize(w_winner_per = mean(w_winner == 1) * 100,
            w_winner_n = sum(w_winner == 1),
            n = n())

## Women's representation has improved over the decades
data_final_round %>%
  group_by(decade) %>%
  summarize(w_winner_per = mean(w_winner == 1) * 100,
            w_winner_n = sum(w_winner == 1),
            n = n())

## Women's representation strongest in Latin America and Europe
data_final_round %>%
  group_by(region) %>%
  summarize(w_winner_per = mean(w_winner == 1) * 100,
            w_winner_n = sum(w_winner == 1),
            n = n())

## Region x Decade
data_final_round %>%
  group_by(region, decade) %>%
  summarize(w_winner_per = mean(w_winner == 1) * 100,
            w_winner_n = sum(w_winner == 1),
            n = n())


# Runoff + CFBV Stats-----

## Runoffs held in 60% of elections
sum(data_first_round$runoff_held == 1, na.rm = T) / sum(!is.na(data_first_round$runoff_held))

## CFBVs in 19% of all runoffs
sum(data_final_round$cfbv == 1, na.rm = T) / sum(data_final_round$rule == "Runoff", na.rm = T)

## CFBVs in 32% of all second rounds
sum(data_final_round$cfbv == 1, na.rm = T) / sum(data_final_round$rule == "Runoff" & data_final_round$round == 2, na.rm = T)


# Table 1: Regressions-----

model1 <- glm(w_winner ~ rule + strong_sexism_imp + 
                pres_type + w_incumbent_running + incumbent_run + 
                labor_part + ldi + region + decade, 
               data_final_round, family = "binomial")
summ(model1)

model2 <- glm(w_winner ~ rule * strong_sexism_imp + 
                pres_type + w_incumbent_running + incumbent_run + 
                labor_part + ldi + region + decade, 
               data_final_round, family = "binomial")
summ(model2)


# Figure 1: Sexism Over Time-----

ggplot(data_final_round, aes(year, strong_sexism_imp, group = region)) +
  geom_point(alpha = 0.25) +
  geom_smooth(linetype = "dashed", color = "black", se = F) +
  facet_wrap(~region) +
  coord_cartesian(ylim = c(0, 40)) +
  theme_classic() +
  theme(text = element_text(size = 12),
        axis.line = element_line(color = "black", size = 0.1),
        axis.ticks = element_line(color = "black", size = 0.1),
        legend.position = "bottom") +
  labs(
    x = "",
    y = "% Voter Bias"
  )

ggsave(path = "Figures/", filename = "Figure 1.png", width = 8, height = 6, device = 'png', dpi = 300)

data_final_round %>% 
  group_by(region) %>%
  summarize(n = n(),
            per = n / 457,
            mean = mean(strong_sexism_imp, na.rm = T),
            sd = sd(strong_sexism_imp, na.rm = T),
            cv = sd / mean)


# Figure 2: Women Winners by Region-Decade-----

figure_data <- data_final_round %>%
  group_by(region, decade) %>%
  summarize(w_winner_per = mean(w_winner == 1) * 100,
            w_winner_n = sum(w_winner == 1),
            n = n())

ggplot(figure_data, aes(decade, w_winner_per, group = region)) +
  geom_point() +
  geom_line() +
  facet_wrap(~region) +
  coord_cartesian(ylim = c(0, 30)) +
  theme_classic() +
  theme(text = element_text(size = 12),
        axis.line = element_line(color = "black", size = 0.1),
        axis.ticks = element_line(color = "black", size = 0.1),
        legend.position = "bottom") +
  labs(
    x = "",
    y = "% Women Winners"
  )

ggsave(path = "Figures/", filename = "Figure 2.png", width = 8, height = 6, device = 'png', dpi = 300)

w_winner_africa <- data_final_round %>%
  filter(region == "Africa" & w_winner == 1)

w_winner_asia <- data_final_round %>%
  filter(region == "Asia" & w_winner == 1)

w_winner_europe <- data_final_round %>%
  filter(region == "Europe" & w_winner == 1)

w_winner_lam <- data_final_round %>%
  filter(region == "Latin America" & w_winner == 1)

data_final_round %>%
  filter(region == "Latin America") %>%
  mutate(w_winner_num = ifelse(w_winner == "1", 1, 0)) %>%
  group_by(state) %>%
  summarize(max = max(w_winner_num))


# Table 2: Women CFB Losers-----

runoff_with_women <- data_final_round %>%
  filter(runoff_held == 1 & w_candidates > 0) %>%
  mutate(w_runnerup = case_when(
    w_winner == 1 & cfbv == 1 ~ 1,
    w_winner == 0 & cfbv == 0 ~ 1,
    w_winner == 1 & cfbv == 0 ~ 0,
    w_winner == 0 & cfbv == 1 ~ 0))

runoff_no_women <- data_final_round %>%
  filter(runoff_held == 1 & w_candidates == 0)

## Chile 2013 included in w_not_runnerup since both finalists were women
w_runnerup <- filter(runoff_with_women, w_runnerup == 1)
w_not_runnerup <- filter(runoff_with_women, w_runnerup == 0)

## Women first-round winners lost via CFB in 5 of 18 races (28%)
runoff_with_women %>%
  group_by(w_winner, cfbv) %>%
  summarize(n = n())

## Cases with Women CFB Losers
runoff_with_women %>% filter(w_winner == 0 & cfbv == 1) %>% distinct(code)

## CFBVs occurred in 35% of runoffs without women
runoff_no_women %>%
  group_by(cfbv) %>%
  summarize(n = n(),
            per = n / nrow(.) * 100)

# 7-point difference isn't stat. sig.
tidy(t.test(w_not_runnerup$cfbv, runoff_no_women$cfbv, paired = FALSE))


# Table 3: Women CFB Winners-----

## Women runners-up won via CFB victories in 3 of 22 races (14%)
runoff_with_women %>%
  group_by(w_winner, cfbv) %>%
  summarize(n = n())

runoff_with_women %>% filter(w_winner == 1 & cfbv == 1) %>% distinct(code)

## 21-point difference is stat. sig. = 0.02
tidy(t.test(w_runnerup$cfbv, runoff_no_women$cfbv, paired = FALSE))


# Analysis: Vote Margins-----

r1_w_first <- data_first_round %>% filter(runoff_held == 1 & woman_first == 1)

r1_w_second <- data_first_round %>% filter(runoff_held == 1 & woman_second == 1) %>%
  filter(code != "Chile-2013")

all_cfbv_list <- data_final_round %>% filter(cfbv == 1) %>% pull(code)
all_cfbv <- data_first_round %>% filter(code %in% all_cfbv_list)

summary(r1_w_second$margin)
sd(r1_w_second$margin)

summary(r1_w_first$margin)
sd(r1_w_first$margin)

summary(all_cfbv$margin)
sd(all_cfbv$margin)

tidy(t.test(r1_w_second$margin, all_cfbv$margin, paired = FALSE))

tidy(t.test(r1_w_first$margin, all_cfbv$margin, paired = FALSE))


# Analysis: Ideology and CFB Results-----

parties <- read_csv("Data/CFBV Parties Data.csv") %>%
  mutate(w_more_conservative = case_when(
    w_winner == 1 & first_more_conservative == T ~ T,
    w_winner == 1 & first_more_conservative == F ~ F,
    w_winner == 0 & first_more_conservative == T ~ F,
    w_winner == 0 & first_more_conservative == F ~ T))

parties$w_more_conservative[parties$state == "Ecuador" & parties$election_year == 2023] <- F
parties$w_more_conservative[parties$state == "Slovakia" & parties$election_year == 2019] <- T


parties %>% filter(w_more_conservative == T & w_winner == 0 & cfbv == 1)

parties %>% filter(w_more_conservative == F & w_winner == 0 & cfbv == 1)


# Table 4: Women Who Failed to Come from Behind-----

w_no_cfb <- runoff_with_women %>% filter(w_winner == 0 & cfbv == 0)

w_no_cfb <- data_first_round %>% 
  filter(code %in% w_no_cfb$code) %>%
  mutate(low_margin = ifelse(margin <= mean(all_cfbv$margin), 1, 0)) %>%
  select(code, margin, low_margin)


# Table A1: Cases-----

unique(data$state)

table_cases <- data_first_round %>%
  group_by(state, rule, pres_type, region) %>%
  summarize(years = paste(sort(unique(year)), collapse = ", "),
            n = n())

doc_cases <- read_docx()

doc_cases <- doc_cases %>%
  body_add_flextable(flextable(table_cases)) %>%
  body_add_par("", style = "Normal")

print(doc_cases, target = "Cases.docx")


# Table A2: Robustness: Alternative Measures-----

modela1 <- glm(w_winner ~ rule + gender_exclusion + 
                pres_type + w_incumbent_running + incumbent_run + 
                labor_part + ldi + region + decade, 
              data_final_round, family = "binomial")
summ(modela1)

modela2 <- glm(w_winner ~ rule * gender_exclusion + 
                pres_type + w_incumbent_running + incumbent_run + 
                labor_part + ldi + region + decade, 
              data_final_round, family = "binomial")
summ(modela2)

modela3 <- glm(w_winner ~ rule + female_edu_imputed + 
                 pres_type + w_incumbent_running + incumbent_run + 
                 labor_part + ldi + region + decade, 
               data_final_round, family = "binomial")
summ(modela3)

modela4 <- glm(w_winner ~ rule * female_edu_imputed + 
                 pres_type + w_incumbent_running + incumbent_run + 
                 labor_part + ldi + region + decade, 
               data_final_round, family = "binomial")
summ(modela4)



# Table A3: Robustness: Higher LDI Threshold-----

data_final_robust <- data_final_round %>% filter(edi >= 0.5)

modela5 <- glm(w_winner ~ rule + strong_sexism_imp + 
                pres_type + w_incumbent_running + incumbent_run + 
                labor_part + ldi + region + decade, 
              data_final_robust, family = "binomial")
summ(modela5)

modela6 <- glm(w_winner ~ rule * strong_sexism_imp + 
                pres_type + w_incumbent_running + incumbent_run + 
                labor_part + ldi + region + decade, 
              data_final_robust, family = "binomial")
summ(modela6)

